This notebook documents preliminary analysis of tracking data for fish tagged in Molokini Crater between 2020-05-16 and 2021-05-24.
The purpose of this study is to understand how human impacts affect the fish of Molokini Crater
We are particularly interested in answering the following hypotheses: 1. Is the presence of fish affected by vessel presence
Proposed Approach: 1. Begin by calculating the number of each species tagged and basic summary statistics 2. Calculate Metrics - Receiver Use - Pianka’s Niche Overlap - residency 3. Make the following plots - Map - Receiver locations - Map - Average receiver use by Species - Scatterplot - day night plots - Bar Plot - The number of detections per day (individual) - Bar Plot - The number of individuals detected (species) - Line Chart - The proportion of individuals detected n days after tagging (30 day moving average by species) - Bar Plot - Daily vessel traffic - Scatter Plot - vessel traffic vs. proportion of fish detected in crater daily (scatterplot by species) 4. Perform the following statistical Tests - Compare Residency Rates by Species - Compare residency by species, size, and time at liberty - Create a GLM comparing # of individuals in crater regressed against boat traffic and species using AR(1) term on dependent variable on some time scale (daily? 6 hours? depends on resolution of vessel data)
project_directory = '/Users/stephenscherrer/Documents/Programming/Projects/Molokini'
scripts_directory = file.path(project_directory, 'Analysis Scripts')
data_directory = file.path(project_directory, 'Data')
results_directory = file.path(project_directory, 'Results')
figure_directory = file.path(results_directory, 'Figures')
source(file.path(scripts_directory, 'Utility Functions.R'))
## Vessel Traffic data
vessel_df =
## Metadata Files
tagging_df = load_tagging_data(file.path(data_directory, 'Molokini_Fish_Tagging_master.xlsx'))
line 1 appears to contain embedded nullsline 2 appears to contain embedded nullsline 3 appears to contain embedded nullsline 4 appears to contain embedded nullsembedded nul(s) found in inputError in names(x) <- value :
'names' attribute [2] must be the same length as the vector [1]
unique(molo_df$tag_id)
[1] "47513" "30711" "30754" "51591" "51590" "51593" "39194" "30755" "51594"
tags_by_species = aggregate(tag_id ~ species, data = tagging_df, FUN = uniqueN)
colnames(tags_by_species) = c('species', 'n_tagged')
print(tags_by_species)
0 = no overlap, 1 = perfect overlap
Study Area
molo_basemap = get_map(location = c(lon = -156.496331, lat = 20.633007), zoom = 16, maptype = 'satellite')
Source : https://maps.googleapis.com/maps/api/staticmap?center=20.633007,-156.496331&zoom=16&size=640x640&scale=2&maptype=satellite&language=en-EN&key=xxx-hggZe5I57UhGHb8
molo_basemap = get_map(location = c(lon = -156.496331, lat = 20.633007), zoom = 16, maptype = 'satellite')
Source : https://maps.googleapis.com/maps/api/staticmap?center=20.633007,-156.496331&zoom=16&size=640x640&scale=2&maptype=satellite&language=en-EN&key=xxx-hggZe5I57UhGHb8
receiver_map = ggmap(molo_basemap) + geom_point(data = molo_df[molo_df$receiver != 'Tagging Location', ], mapping = aes(x = lon, y = lat), col = 'red') + labs(x = '°Longitude', y = '°Latitude') + ggsave(filename = 'Receiver Locations Google Map.pdf', path = figure_directory)
Saving 7 x 7 in image
receiver_map = ggmap(molo_basemap) + geom_point(data = molo_df[molo_df$receiver != 'Tagging Location', ], mapping = aes(x = lon, y = lat), col = 'red') + labs(x = '°Longitude', y = '°Latitude') + ggsave(filename = 'Receiver Locations Google Map.pdf', path = figure_directory)
print(receiver_map)
## Make species plots for receiver use
for(species in species_receiver_use$species){
receiver_use_by_spp = ggmap(molo_basemap) +
geom_point(data = species_receiver_use[species_receiver_use$species == species, ],
mapping = aes(x = lon, y = lat, color = 'red', size = receiver_use)) +
labs(x = '°Longitude', y = '°Latitude') +
ggsave(filename = paste('Receiver Use by ', species, '.pdf', sep = ''), path = figure_directory)
print(receiver_use_by_spp)
}
Saving 7 x 7 in image
### Day Night Plots
## For all fish
plot_day_night(molo_df, plot_title = 'All Fish')
## By Species
for (spp in unique(molo_df$species)){
plot_day_night(molo_df[molo_df$tag_id == molo_df$tag_id[molo_df$species == spp], ], plot_title = spp)
}
longer object length is not a multiple of shorter object length
longer object length is not a multiple of shorter object length
longer object length is not a multiple of shorter object length
longer object length is not a multiple of shorter object length
## By Individual
for (tag_id in molo_df$tag_id){
plot_day_night(molo_df[molo_df$tag_id == tag_id, ], plot_title = paste(tagging_df$species[tagging_df$tag_id == tag_id], '- Tag', as.character(tag_id), sep = ' '))
}
NA
## Barplot of detections by individual
for(column in colnames(detections_per_day_df)[2:ncol(detections_per_day_df)]){
ggplot(data = detections_per_day_df, mapping = aes_string(x = 'date', y = column)) +
geom_bar(stat = "identity") +
labs(title = paste('Tag ', strsplit(x = column, split = '_')[[1]][2], sep = ''), x = 'Date', y = 'Detections') +
ggsave(filename = paste('Daily Detection Barplot -', column, '.pdf'), path = figure_directory)
}
Saving 7 x 7 in image
## Convert detections_per_day to presence/absence
presence_absence_wide_df = detections_per_day_df
for (i in 2:ncol(presence_absence_wide_df)){
presence_absence_wide_df[ ,i] = as.numeric(presence_absence_wide_df[ ,i] > 0)
}
## Convert from wide to long format
presence_absence_long_df = melt(presence_absence_wide_df, id.vars = c('date'), measure.vars = colnames(presence_absence_wide_df)[2:ncol(presence_absence_wide_df)], variable.name = 'tag_id', value.name = 'detected')
# Drop 'tag_' prefix from tag_id column for matching purposes
presence_absence_long_df$tag_id = levels(presence_absence_long_df$tag_id)[presence_absence_long_df$tag_id]
for(i in 1:nrow(presence_absence_long_df)){
presence_absence_long_df$tag_id[i] = strsplit(presence_absence_long_df$tag_id[i], split = '_')[[1]][2]
}
## Merge with species from tagging data
presence_absence_long_df = merge(x = presence_absence_long_df, y = tagging_df[ ,c('tag_id', 'species')], on = 'tag_id')
## Drop date and tag pairs preceding the date the fish was tagged
indicies_to_drop = c()
for(i in nrow(presence_absence_long_df)){
if(as.Date(tagging_df$datetime[tagging_df$tag_id == presence_absence_long_df$tag_id[i]]) <= presence_absence_long_df$date[i]){
indicies_to_drop = c(indicies_to_drop, i)
}
}
Error in if (as.Date(tagging_df$datetime[tagging_df$tag_id == presence_absence_long_df$tag_id[i]]) <= :
missing value where TRUE/FALSE needed
### LOGIC HERE TO GET TO # BOATS / DAY
## Get max_vessels at any given time, total_vessels
# Make plot for max_vessels
max_vessels_plot = ggplot(data = vessels_per_day, mapping = aes(x = date, y = max_vessels)) +
geom_bar(stat = 'identity') +
labs(title = 'Maximum Number of Co-occuring Vessels Daily', x = 'Date', y = '# of Vessels') +
ggsave(filename = paste('Maximum Number of Co-occuring Vessels Daily.pdf ', species, '.pdf', sep = ''), path = figure_directory)
# Make plot for tptal_vessels
total_vessels_plot = ggplot(data = vessels_per_day, mapping = aes(x = date, y = total_vessels)) +
geom_bar(stat = 'identity') +
labs(title = 'Maximum Number of Co-occuring Vessels Daily', x = 'Date', y = '# of Vessels') +
ggsave(filename = paste('Total Vessels Daily.pdf ', species, '.pdf', sep = ''), path = figure_directory)
print(max_vessels_plot)
print(total_vessels_plot)
## Calculate residency
detection_stats$residence_metric = detection_stats$unique_days / detection_stats$days_at_liberty
Error in `$<-.data.frame`(`*tmp*`, residence_metric, value = numeric(0)) :
replacement has 0 rows, data has 2
## Get total days in the study
total_days_in_study = as.numeric(diff.Date(c(min(molo_df$date), max(molo_df$date))))
## Create a dataframe where rows are tag id and columns are study date
present_after_n_days_df = data.frame()
## Determine if a tag was detected on a receiver n days after tagging
for (i in 1:uniqueN(molo_df$tag_id)){
## Subset data for individual tags
indv_data = molo_df[molo_df$tag_id == unique(molo_df$tag_id)[i], ]
## Determine if a fish was present n days after tagging
difftimes = rep(0, len = total_days_in_study)
# determine difference in days between each unique day a tag was detected and the tag's earliest detection, flip the corresponding value in difftimes array to 1
detected_dates = unique(indv_data$date)
for (j in 1:length(detected_dates)){
difftimes[as.numeric(diff.Date(c(min(indv_data$date), detected_dates[j]))) + 1] = 1
}
df_row = c(unique(molo_df$tag_id)[i], difftimes)
present_after_n_days_df = rbind(present_after_n_days_df, df_row)
}
colnames(present_after_n_days_df) = c('tag_id', as.character(1:total_days_in_study))
Error in names(x) <- value :
'names' attribute [3] must be the same length as the vector [2]
Calculate mean residency by spp (irregardless of time), then ANOVA by spp Use Tukey’s HSD to determine significance
## ANOVA model for residency metric by species
residence_by_species_anova = aov(residence_metric ~ species, data=detection_stats)
Error in eval(predvars, data, env) : object 'residence_metric' not found
GLM comparing size and residency time by spp independent var (size, time at liberty) dependent (residency index)
## Fit binomial GLM to average residency metric data (proportional between 0-1)
species_glm = glm(residence_metric ~ species + fork_length_cm *days_at_liberty * species, data = detection_stats, family = binomial(logit))
Error in eval(predvars, data, env) : object 'residence_metric' not found